diff --git a/assets/src/bundles/admin/deposit.js b/assets/src/bundles/admin/deposit.js --- a/assets/src/bundles/admin/deposit.js +++ b/assets/src/bundles/admin/deposit.js @@ -1,17 +1,23 @@ /** - * Copyright (C) 2018-2021 The Software Heritage developers + * Copyright (C) 2018-2022 The Software Heritage developers * See the AUTHORS file at the top-level directory of this distribution * License: GNU Affero General Public License version 3, or any later version * See top-level LICENSE file for more information */ function genSwhLink(data, type) { - if (type === 'display') { - if (data && data.startsWith('swh')) { - const browseUrl = Urls.browse_swhid(data); - const formattedSWHID = data.replace(/;/g, ';
'); - return `${formattedSWHID}`; - } + if (type === 'display' && data && data.startsWith('swh')) { + const browseUrl = Urls.browse_swhid(data); + const formattedSWHID = data.replace(/;/g, ';
'); + return `${formattedSWHID}`; + } + return data; +} + +function genLink(data, type) { + if (type === 'display' && data) { + const sData = encodeURI(data); + return `${sData}`; } return data; } @@ -55,23 +61,14 @@ name: 'id' }, { - data: 'swhid_context', - name: 'swhid_context', + data: 'type', + name: 'type' + }, + { + data: 'uri', + name: 'uri', render: (data, type, row) => { - if (data && type === 'display') { - const originPattern = ';origin='; - const originPatternIdx = data.indexOf(originPattern); - if (originPatternIdx !== -1) { - let originUrl = data.slice(originPatternIdx + originPattern.length); - const nextSepPattern = ';'; - const nextSepPatternIdx = originUrl.indexOf(nextSepPattern); - if (nextSepPatternIdx !== -1) { /* Remove extra context */ - originUrl = originUrl.slice(0, nextSepPatternIdx); - } - return `${originUrl}`; - } - } - return data; + return genLink(data, type); } }, { diff --git a/cypress/integration/deposit-admin.spec.js b/cypress/integration/deposit-admin.spec.js --- a/cypress/integration/deposit-admin.spec.js +++ b/cypress/integration/deposit-admin.spec.js @@ -1,5 +1,5 @@ /** - * Copyright (C) 2020-2021 The Software Heritage developers + * Copyright (C) 2020-2022 The Software Heritage developers * See the AUTHORS file at the top-level directory of this distribution * License: GNU Affero General Public License version 3, or any later version * See top-level LICENSE file for more information @@ -14,30 +14,36 @@ responseDeposits = [ { 'id': 614, + 'type': 'code', 'external_id': 'ch-de-1', 'reception_date': '2020-05-18T13:48:27Z', 'status': 'done', 'status_detail': null, 'swhid': 'swh:1:dir:ef04a768', - 'swhid_context': 'swh:1:dir:ef04a768;origin=https://w.s.o/c-d-1;visit=swh:1:snp:b234be1e;anchor=swh:1:rev:d24a75c9;path=/' + 'swhid_context': 'swh:1:dir:ef04a768;origin=https://w.s.o/c-d-1;visit=swh:1:snp:b234be1e;anchor=swh:1:rev:d24a75c9;path=/', + 'uri': 'https://w.s.o/c-d-1' }, { 'id': 613, + 'type': 'code', 'external_id': 'ch-de-2', 'reception_date': '2020-05-18T11:20:16Z', 'status': 'done', 'status_detail': null, 'swhid': 'swh:1:dir:181417fb', - 'swhid_context': 'swh:1:dir:181417fb;origin=https://w.s.o/c-d-2;visit=swh:1:snp:8c32a2ef;anchor=swh:1:rev:3d1eba04;path=/' + 'swhid_context': 'swh:1:dir:181417fb;origin=https://w.s.o/c-d-2;visit=swh:1:snp:8c32a2ef;anchor=swh:1:rev:3d1eba04;path=/', + 'uri': 'https://w.s.o/c-d-2' }, { 'id': 612, + 'type': 'code', 'external_id': 'ch-de-3', 'reception_date': '2020-05-18T11:20:16Z', 'status': 'rejected', 'status_detail': 'incomplete deposit!', 'swhid': null, - 'swhid_context': null + 'swhid_context': null, + 'uri': null } ]; // those are computed from the @@ -87,6 +93,8 @@ assert.isNotNull(deposit); assert.isNotNull(responseDeposit); expect(deposit.id).to.be.equal(responseDeposit['id']); + expect(deposit.uri).to.be.equal(responseDeposit['uri']); + expect(deposit.type).to.be.equal(responseDeposit['type']); expect(deposit.external_id).to.be.equal(responseDeposit['external_id']); expect(deposit.status).to.be.equal(responseDeposit['status']); expect(deposit.status_detail).to.be.equal(responseDeposit['status_detail']); diff --git a/swh/web/admin/deposit.py b/swh/web/admin/deposit.py --- a/swh/web/admin/deposit.py +++ b/swh/web/admin/deposit.py @@ -1,4 +1,4 @@ -# Copyright (C) 2018-2021 The Software Heritage developers +# Copyright (C) 2018-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -13,7 +13,11 @@ from swh.web.admin.adminurls import admin_route from swh.web.auth.utils import ADMIN_LIST_DEPOSIT_PERMISSION -from swh.web.common.utils import get_deposits_list +from swh.web.common.utils import ( + get_deposits_list, + parse_swh_deposit_origin, + parse_swh_metadata_provenance, +) def _can_list_deposits(user): @@ -70,9 +74,11 @@ data = paginator.page(page).object_list table_data["recordsTotal"] = deposits_count table_data["recordsFiltered"] = len(deposits) - table_data["data"] = [ - { + data_list = [] + for d in data: + data_dict = { "id": d["id"], + "type": d["type"], "external_id": d["external_id"], "reception_date": d["reception_date"], "status": d["status"], @@ -80,13 +86,39 @@ "swhid": d["swhid"], "swhid_context": d["swhid_context"], } - for d in data - ] + provenance = None + raw_metadata = d["raw_metadata"] + # Try to determine provenance out of the raw metadata + if raw_metadata and d["type"] == "meta": # metadata provenance + provenance = parse_swh_metadata_provenance(d["raw_metadata"]) + elif raw_metadata and d["type"] == "code": + provenance = parse_swh_deposit_origin(raw_metadata) + + if not provenance and d["origin_url"]: + provenance = d["origin_url"] + + # Finally, if still not found, we determine uri using the swhid + if not provenance and d["swhid_context"]: + # Trying to compute the origin as we did before in the js + from swh.model.swhids import QualifiedSWHID + + swhid = QualifiedSWHID.from_string(d["swhid_context"]) + provenance = swhid.origin + + data_dict["uri"] = provenance # could be None + + # This could be large. As this is not displayed yet, drop it to avoid + # cluttering the data dict + data_dict.pop("raw_metadata", None) + + data_list.append(data_dict) + + table_data["data"] = data_list except Exception as exc: sentry_sdk.capture_exception(exc) - table_data["error"] = ( - "An error occurred while retrieving " "the list of deposits !" - ) + table_data[ + "error" + ] = "An error occurred while retrieving the list of deposits !" return JsonResponse(table_data) diff --git a/swh/web/common/utils.py b/swh/web/common/utils.py --- a/swh/web/common/utils.py +++ b/swh/web/common/utils.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2021 The Software Heritage developers +# Copyright (C) 2017-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -8,6 +8,7 @@ import re from typing import Any, Dict, List, Optional import urllib.parse +from xml.etree import ElementTree from bs4 import BeautifulSoup from docutils.core import publish_parts @@ -447,3 +448,69 @@ request_path = resolve(request.path_info) args = {**request_path.kwargs, **request.GET.dict()} return redirect(reverse(new_route, query_params=args), permanent=permanent,) + + +NAMESPACES = { + "swh": "https://www.softwareheritage.org/schema/2018/deposit", + "schema": "http://schema.org/", +} + + +def parse_swh_metadata_provenance(raw_metadata: str) -> Optional[str]: + """Parse swh metadata-provenance out of the raw metadata deposit. If found, returns the + value, None otherwise. + + .. code-block:: xml + + + + https://example.org/metadata/url + + + + Args: + raw_metadata: raw metadata out of deposits received + + Returns: + Either the metadata provenance url if any or None otherwise + + """ + metadata = ElementTree.fromstring(raw_metadata) + url = metadata.findtext( + "swh:deposit/swh:metadata-provenance/schema:url", namespaces=NAMESPACES, + ) + return url or None + + +def parse_swh_deposit_origin(raw_metadata: str) -> Optional[str]: + """Parses and from metadata document, + if any. They are mutually exclusive and tested as such in the deposit. + + .. code-block:: xml + + + + + + + + .. code-block:: xml + + + + + + + + Returns: + The one not null if any, None otherwise + + """ + metadata = ElementTree.fromstring(raw_metadata) + for origin_tag in ["create_origin", "add_to_origin"]: + elt = metadata.find( + f"swh:deposit/swh:{origin_tag}/swh:origin[@url]", namespaces=NAMESPACES + ) + if elt is not None: + return elt.attrib["url"] + return None diff --git a/swh/web/templates/admin/deposit.html b/swh/web/templates/admin/deposit.html --- a/swh/web/templates/admin/deposit.html +++ b/swh/web/templates/admin/deposit.html @@ -30,19 +30,21 @@
Toggle column: id - - origin - - reception date - - status - - status detail - - directory - - directory with context + type - + uri - + reception date - + status - + status detail - + directory - + directory with context

- + + diff --git a/swh/web/tests/common/test_utils.py b/swh/web/tests/common/test_utils.py --- a/swh/web/tests/common/test_utils.py +++ b/swh/web/tests/common/test_utils.py @@ -1,10 +1,10 @@ -# Copyright (C) 2017-2021 The Software Heritage developers +# Copyright (C) 2017-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information - from base64 import b64encode import datetime +from os.path import join from urllib.parse import quote import pytest @@ -314,3 +314,44 @@ def test_is_swh_web_production(request_factory): request = request_factory.get("/", SERVER_NAME=SWH_WEB_SERVER_NAME) assert utils.is_swh_web_production(request) + + +@pytest.mark.parametrize( + "raw_metadata_file,expected_url", + [ + ("raw-metadata-provenance.xml", "https://example.org/metadata/provenance"), + ("raw-metadata-no-swh.xml", None), + ], +) +def test_parse_swh_provenance(datadir, raw_metadata_file, expected_url): + metadata_path = join(datadir, "deposit", raw_metadata_file) + with open(metadata_path, "r") as f: + raw_metadata = f.read() + + actual_url = utils.parse_swh_metadata_provenance(raw_metadata) + + assert actual_url == expected_url + + +@pytest.mark.parametrize( + "raw_metadata_file,expected_url", + [ + ( + "raw-metadata-create-origin.xml", + "https://example.org/metadata/create-origin", + ), + ( + "raw-metadata-add-to-origin.xml", + "https://example.org/metadata/add-to-origin", + ), + ("raw-metadata-no-swh.xml", None), + ], +) +def test_parse_swh_origins(datadir, raw_metadata_file, expected_url): + metadata_path = join(datadir, "deposit", raw_metadata_file) + with open(metadata_path, "r") as f: + raw_metadata = f.read() + + actual_url = utils.parse_swh_deposit_origin(raw_metadata) + + assert actual_url == expected_url diff --git a/swh/web/tests/resources/deposit/raw-metadata-add-to-origin.xml b/swh/web/tests/resources/deposit/raw-metadata-add-to-origin.xml new file mode 100644 --- /dev/null +++ b/swh/web/tests/resources/deposit/raw-metadata-add-to-origin.xml @@ -0,0 +1,13 @@ + + + Awesome Compiler + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + dudess + + + + + + diff --git a/swh/web/tests/resources/deposit/raw-metadata-create-origin.xml b/swh/web/tests/resources/deposit/raw-metadata-create-origin.xml new file mode 100644 --- /dev/null +++ b/swh/web/tests/resources/deposit/raw-metadata-create-origin.xml @@ -0,0 +1,13 @@ + + + Awesome Compiler + urn:uuid:1225c695-cfb8-4ebb-daaaa-80da344efa6a + dudess + + + + + + diff --git a/swh/web/tests/resources/deposit/raw-metadata-no-swh.xml b/swh/web/tests/resources/deposit/raw-metadata-no-swh.xml new file mode 100644 --- /dev/null +++ b/swh/web/tests/resources/deposit/raw-metadata-no-swh.xml @@ -0,0 +1,7 @@ + + + Awesome Compiler + urn:uuid:1225c695-cfb8-4ebb-daaaa-80da344efa6a + dudess + diff --git a/swh/web/tests/resources/deposit/raw-metadata-provenance.xml b/swh/web/tests/resources/deposit/raw-metadata-provenance.xml new file mode 100644 --- /dev/null +++ b/swh/web/tests/resources/deposit/raw-metadata-provenance.xml @@ -0,0 +1,14 @@ + + + Awesome Compiler + urn:uuid:1225c695-cfb8-4ebb-daaaa-80da344efa6a + dudess + + + https://example.org/metadata/provenance + + +
idorigintypeuri reception date status status detail